%{
AUTHOR: Felipe Arteaga
-------------------------------------------------------------------------
PROJECT: Warnings
-------------------------------------------------------------------------
DESCRIPTION:
=========================================================================
%}



clear;clc;close all;fclose('all');feature('DefaultCharacterSet','UTF-8');

    pcName=char(java.lang.System.getProperty('user.name'));
    if(strcmp(pcName,'felipe'))
        % PC Felipe
        myDir='/Users/felipe/Dropbox/';
        projectDir=[myDir,'git/warnings/'];
        projectDirData=[myDir,'projects/warnings/'];
        addpath(genpath([myDir,'/myMatlabFunctions/']));
    end

compileLatexTable=true;

%%
%dirPlots=[projectDir,'/paper/figuresCL/RDs/'];

dirTable='/Users/felipe/Dropbox/Mineduc/modelacion/riesgo/informes/informeMineduc2021/tables/';

dirData=[projectDirData,'/data/chile/'];


    heter=''; %'': none, 'vulnerable', 'maturity ', 'sex'  '': none;

closePlotsWhilePlotting=true;

plotRDs=false;
savePlotRDs=false;
plotWithZoom=false;
bandwidthTable='local'; % 'full','local','localAuto';




    tableBandwidthComparison=false;
    plotAndSaveRobust=false;
    tableDetails=false;



%% Load data:
anho=0; %0: pooled
if(anho==0)
    anhoStr='';
else
    anhoStr=sprintf('%i',anho);
end


notIn2020={}; % {'assignedToPref','acceptOffer','declineOffer','preferenciaAsign_1','placedInAnyPreferenceIniEnd','placedInAddedIniEnd','placedInAddedNoWaitlistIniEnd','placedInOldIniEnd','enrolledInAssigned','enrolledInAddedIniEnd','enrolledInAddedNoWaitlistIniEnd','enrolledInAddedNoRiskIniEnd'};

if(anho==0)
    
    d1=load([dirData,'2018/inputRD'],'dataRD');
    d2=load([dirData,'2019/inputRD'],'dataRD');
    d3=load([dirData,'2020/inputRD'],'dataRD');
    d4=load([dirData,'2021/inputRD'],'dataRD');
    % Variables that are not available for 2020 yet:
    fillWithNan=notIn2020;
    %fillWithNan=
    for v=1:length(fillWithNan)
        d3.dataRD.(fillWithNan{v})=nan(height(d3.dataRD),1);
    end
    
    
    
    d1.dataRD.anho=2018*ones(height(d1.dataRD),1);
    d2.dataRD.anho=2019*ones(height(d2.dataRD),1);
    d3.dataRD.anho=2020*ones(height(d3.dataRD),1);
    d4.dataRD.anho=2021*ones(height(d4.dataRD),1);
    
    % Keep vars in common
    
    incommon=intersect(intersect(intersect(d1.dataRD.Properties.VariableNames,d2.dataRD.Properties.VariableNames),d3.dataRD.Properties.VariableNames),d4.dataRD.Properties.VariableNames);
    incommon=incommon(not(strcmp(incommon,'mrun')));
    dataRD=[d1.dataRD(:,incommon);d2.dataRD(:,incommon);d3.dataRD(:,incommon);d4.dataRD(:,incommon)];
    
    
    
else
    load([dirData,anhoStr,'/inputRD'],'dataRD')
end

dataRD.rural=dataRD.newMarket<0;

% As asignment at .3 is control, just move (if any) in .3 to epsilon to the
% left:
dataRD.riskPopup(dataRD.riskPopup==.3)=.29999999;

% Avoid mass points at extremes:
dataRD.restrAll=dataRD.pobPopup==1&dataRD.riskPopup>.01&dataRD.riskPopup<.99;

% No need to be more specific, and put "Predicted placement risk of 1st
% attempt"
dataRD.Properties.VariableDescriptions{'riskPopup'}='Predicted placement risk';
dataRD.Properties.VariableDescriptions{'treatedPopup'}='Treated pop-up in first attempt';


if(false)
   %% % Popups of total population
   n_apps=[274990,483070,454415];
   
   fprintf('2018 In data: %.1f, in pop-up population: %.1f\n',height(d1.dataRD)/n_apps(1)*100,sum(d1.dataRD.pobPopup)/n_apps(1)*100);
   fprintf('2019 In data: %.1f, in pop-up population: %.1f\n',height(d2.dataRD)/n_apps(2)*100,sum(d2.dataRD.pobPopup)/n_apps(2)*100);
   fprintf('2020 In data: %.1f, in pop-up population: %.1f\n',height(d3.dataRD)/n_apps(3)*100,sum(d3.dataRD.pobPopup)/n_apps(3)*100);
   
   fprintf('\n ALL  In data: %.1f, in pop-up population: %.1f\n',height(dataRD)/sum(n_apps)*100,sum(dataRD.pobPopup)/sum(n_apps)*100);
   
end
   
   
   


%% RD

% Defines subpopulation in which I want to calculate RDs for outcomes
% defined soon.

if(anho==0) % This calculates estimates pooling all years
    
    % 4th column is if we calculate IV
    
    switch heter
        case ''
            subpobs=cell(1,4);
            s=1;
            subpobs(s,:)={'Todos',    dataRD.restrAll,'pooled','addAnyIniEnd'};s=s+1;
            subpobs(s,:)={'2018',   dataRD.restrAll&dataRD.anho==2018,'2018',''};s=s+1;
            subpobs(s,:)={'2019',      dataRD.restrAll&dataRD.anho==2019,'2019',''};s=s+1;
            subpobs(s,:)={'2020',    dataRD.restrAll&dataRD.anho==2020,'2020',''};s=s+1;
            subpobs(s,:)={'2021',    dataRD.restrAll&dataRD.anho==2021,'2021',''};s=s+1;
            posAll=1;
        case '_vulnerable'
            
            subpobs=cell(1,4);
            s=1;
            subpobs(s,:)={'Economically Vulnerable',    dataRD.restrAll&dataRD.esSep==1,'vul','addAnyIniEnd'};s=s+1;
            subpobs(s,:)={'Not Economically vulnerable',    dataRD.restrAll&dataRD.esSep==0,'notVul','addAnyIniEnd'};s=s+1;
            posAll=1;
        case '_sex'
            
            subpobs=cell(1,4);
            s=1;
            subpobs(s,:)={'Female',    dataRD.restrAll&dataRD.female==1,'fem','addAnyIniEnd'};s=s+1;
            subpobs(s,:)={'Male',    dataRD.restrAll&dataRD.female==0,'notFem','addAnyIniEnd'};s=s+1;
            posAll=1;
        case '_maturity'
            
            load([projectDirData,'/data/chile/auxiliar/policy.mat'])
            policy.grade=policy.grado;
            % anhoSAE es de matricula, no de postulacion
            policy.anhoFirstSAE=policy.anhoSae-1;
            
            % Ojo: esto cambia el orden de la filas!
            dataRD=outerjoin(dataRD,policy,'keys',{'cod_reg','grade'},'mergeKeys',true,'type','left','rightVariables',{'anhoFirstSAE'});
            dataRD.maturity=dataRD.anho-dataRD.anhoFirstSAE+1;
            assert(all(dataRD.maturity>0&dataRD.maturity<6))
            
            subpobs=cell(1,4);
            s=1;
            subpobs(s,:)={'1st year',    dataRD.restrAll&dataRD.maturity==1,'1year','addAnyIniEnd'};s=s+1;
            subpobs(s,:)={'2nd year',    dataRD.restrAll&dataRD.maturity==2,'2year','addAnyIniEnd'};s=s+1;
            subpobs(s,:)={'3rd+ year',    dataRD.restrAll&dataRD.maturity>=3,'3year','addAnyIniEnd'};s=s+1;
            posAll=1;
            
        otherwise
            error('aca')
    end
    
   
    
else
    subpobs=cell(1,3);
    s=1;
    subpobs(s,:)={'All',            dataRD.restrAll,'all',''};s=s+1;
    
    %     subpobs(2,:)={'Vulnerable',   dataRD.restrAll&dataRD.esSep==1,'sep'};
    %     subpobs(3,:)={'Non-vulnerable',      dataRD.restrAll&dataRD.esSep==0,'sepNo'};
    %     subpobs(4,:)={'PK',    dataRD.restrAll&dataRD.grade==-1,'gPk'};
    %     subpobs(5,:)={'9th',dataRD.restrAll&dataRD.grade==9,'g9'};
    %     subpobs(6,:)={'Voluntary',   dataRD.restrAll&dataRD.voluntary==1,'vol'};
    %     subpobs(7,:)={'Non-voluntary',      dataRD.restrAll&dataRD.voluntary==0,'volNo'};
end



% Definition of outcomes for RDs. 3rd row defines the beginning of a panel
% in the table
% 4th row

if(ismember('DRiskLExpostIniEnd',dataRD.Properties.VariableNames))
    error('Borrar esto');
else
    dataRD.DRiskExpostIniEnd=dataRD.riskExpostEnd-dataRD.riskExpostIni;
end

% New: enrolled conditional on placed (short name because of stata)
%dataRD.enrolledInAssignedCOA=double(dataRD.enrolledInAssigned);
%dataRD.enrolledInAssignedCOA(dataRD.assignedToPref==0)=nan;
%dataRD.placedInAnyPreferenceNWIE=dataRD.placedInAnyPreferenceNoWaitlistIniEnd;
%dataRD.placedInAnyPreferenceNoRiskIE=dataRD.placedInAnyPreferenceNoRiskIniEnd;


%dataRD.valueAddedEnrolled(dataRD.grade>8)=nan;
%notIn2020=[notIn2020,'enrolledInAssignedCOA'];

% 5th column defines if IV is calculated or not.
outcomes={
         'addAnyIniEnd','Agrega al menos 1 establecimiento','aa','',0;...
    'schoolsAddedIniEnd','Establecimientos agregados','sa','',0;... % 'A. Comportamiento al postular'
    'addAsFirstIniEnd','Agrega al principio ','af','',0;...
    'addInBetweenIniEnd','Agrega al medio ','ab','',0;...
    'addAsLastIniEnd','Agrega al final ','al','',0;...
        'DRiskExpostIniEnd','Cambio en riesgo','drEP','',0;...
    % 'riskLastDayEnd','Risk final portfolio','rf','';...
   % 'placedInAnyPreferenceIniEnd','Asignado en preferencia ','ap','B. Resultado de postulación',0;...
   % 'placedInAddedIniEnd','Asignado en EE agregado ','apad','',0;...
    %'placedInOldIniEnd','Placed to old preference','apold','';...
    %'enrolledInAssigned','Enrolled in placed','ea','',1;...
    %'distHomeEnrolled','Distance to enrolled (km)','d','',1;...
    %'valueAddedEnrolled','Value added enrolled|grade<=8','va','',1;...
   % 'declineOffer','Rechaza oferta','do','',0;...
    %'enrolledInAddedIniEnd','Enrolled in added','eaad','';...
    %'addAnyUncongestedIniEnd','Add any undersubscribed','aaUncong','D. Congestion-related outcomes',1;...
    %'deltaProbPlacedNoWaitlistsIniEnd','$\Delta$ prob. placed to undersubscribed','dpw','',1;...
     %'deltaProbPlacedNoRiskIniEnd','$\Delta$ prob. placed to zero risk','dpnr','',1;...
    %'placedInAnyPreferenceNWIE','Placed to uncong.','apUncong','',1;...
    %'placedInAddedNoWaitlistIniEnd','Placed to uncong. added pref.','apadUncong','',1;...
    %'enrolledInAddedNoWaitlistIniEnd','Enrolled in uncong. added','eaadUncong','',1;...
%         'addAnyNoRiskIniEnd','Add any zero risk','aaNR','D2. Risk-related outcomes',1;...
%     'placedInAnyPreferenceNoRiskIE','Placed to zero risk','apNR','',1;...
%     'placedInAddedNoRiskIniEnd','Placed to zero risk added pref.','apadNR','',1;...
%     'enrolledInAddedNoRiskIniEnd','Enrolled in zero risk added','eaadNR','',1;...
    %'acceptOffer','Accept offer','ao','';...
    %'preferenciaAsign_1','Order assigned','oa','';...
    };


% outcomes={   % 'riskLastDayIni','True Risk initial attempt','ri','';...
%     %'riskLastDayEnd','True Risk final attempt','rf','';...
%     %'DRiskLastDayIniEnd','$\Delta$ True Risk','dr','';...
%     'riskExpostIni','True Risk initial attempt','riEP','';...
%     'riskExpostEnd','True Risk final attempt','rfEF','';...
%     'DRiskExpostIniEnd','$\Delta$ True Risk ExP','drEP','';...
%     'placedInAnyPreferenceIniEnd','Placed to preference','ap','C. Choice outcome';...
%     'placedInAddedIniEnd','Placed to added preference','apad','';...
%     'placedInOldIniEnd','Placed to old preference','apold','';...
%     };


if(anho==2020)
    outcomes=outcomes(not(ismember(outcomes(:,1),notIn2020)),:);
end



dataRD.Properties.VariableDescriptions{'riskPopup'}='Predicted placement risk';
for o=1:size(outcomes,1)
    
    dataRD.Properties.VariableDescriptions{outcomes{o,1}}=outcomes{o,2};
end


assert(allunique(outcomes(:,1)))
assert(allunique(outcomes(:,2)))
assert(allunique(outcomes(:,3)))


data=dataRD(:,[{'riskPopup'},outcomes(:,1)']);
inAnyPob=false(height(data),1);


% Generate stata commands
commands=cell(size(subpobs,1)*size(outcomes,1),2);
counter=1;

for p=1:size(subpobs,1)
    
    data.(sprintf('subpop%i',p))=subpobs{p,2};
    inAnyPob=inAnyPob|subpobs{p,2};
    
    for o=1:size(outcomes,1)
        
        % Avoid computing outcomes that are not available:
        noCalcular=(strcmp(subpobs{p,3},'2020')|strcmp(subpobs{p,3},'2020comp'))&ismember(outcomes{o,1},notIn2020);
        
        if(not(noCalcular))
            
            
            % Full bandwidth
            if(tableBandwidthComparison||strcmp(bandwidthTable,'full')||plotRDs)
                commands(counter,:)={sprintf('%s_%i_full',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, c(.3) p(2) h(.28 .68)',outcomes{o,1},p)};counter=counter+1;
            end
            % Full loccal fixed bandwidth
            if(tableBandwidthComparison||strcmp(bandwidthTable,'local'))
                commands(counter,:)={sprintf('%s_%i_local',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, c(.3)  h(.1 .1)',outcomes{o,1},p)};counter=counter+1;
            end
            % Full local auto bandwidht
            if(tableBandwidthComparison||strcmp(bandwidthTable,'localAuto'))
                commands(counter,:)={sprintf('%s_%i_localAuto',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, c(.3) ',outcomes{o,1},p)};counter=counter+1;
            end
            
            % If with IV:
            if(outcomes{o,5}==1&&~isempty(subpobs{p,4}))
                
                endogenousVar=subpobs{p,4};
                
                % Full bandwidth
                if(strcmp(bandwidthTable,'full'))
                    commands(counter,:)={sprintf('%s_%i_full_iv',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, fuzzy(%s) c(.3) p(2) h(.28 .68)',outcomes{o,1},p,endogenousVar)};counter=counter+1;
                end
                % Full loccal fixed bandwidth
                if(strcmp(bandwidthTable,'local'))
                    commands(counter,:)={sprintf('%s_%i_local_iv',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, fuzzy(%s) c(.3)  h(.1 .1)',outcomes{o,1},p,endogenousVar)};counter=counter+1;
                end
                % Full local auto bandwidht
                if(strcmp(bandwidthTable,'localAuto'))
                    commands(counter,:)={sprintf('%s_%i_localAuto_iv',outcomes{o,3},p),sprintf('rdrobust %s riskPopup if subpop%i==1, fuzzy(%s) c(.3) ',outcomes{o,1},p,endogenousVar)};counter=counter+1;
                end
            end
        end
    end    
end
commands=commands(not(cellfun(@isempty,commands(:,1),'UniformOutput',true)),:);



% Run Stata:
data=data(inAnyPob,:);
res=stataCommand(commands,data);


%% Make plots and table


cantPobs=(size(subpobs,1));
cantRegs=(size(outcomes,1));

matrix_nl=nan(cantRegs,cantPobs);
matrix_nr=nan(cantRegs,cantPobs);
matrix_b=nan(cantRegs,cantPobs);
matrix_se=nan(cantRegs,cantPobs);

matrix_b_iv=nan(cantRegs,cantPobs);
matrix_se_iv=nan(cantRegs,cantPobs);
matrix_nl_iv=nan(cantRegs,cantPobs);
matrix_nr_iv=nan(cantRegs,cantPobs);

matrix_b_l=nan(cantRegs,cantPobs);
matrix_se_l=nan(cantRegs,cantPobs);

matrix_b_r=nan(cantRegs,cantPobs);
matrix_se_r=nan(cantRegs,cantPobs);

matrix_biasCorr_ci=nan(cantRegs,cantPobs,2);



regNames=fieldnames(res);

for p=1:cantPobs
    for r=1:cantRegs
        
        regName_i=sprintf('%s_%i_%s',outcomes{r,3},p,bandwidthTable);
        if(ismember(regName_i,regNames))
            res_i=res.(regName_i);
            
            % Beta
            matrix_b(r,p)=res_i.tau_cl;
            % Sd Beta
            matrix_se(r,p)=res_i.se_tau_cl;
            
            % Beta left
            matrix_b_l(r,p)=res_i.beta_p_l(1);
            % Sd Beta left
            matrix_se_l(r,p)=sqrt(res_i.V_cl_l(1));
            
            % Beta right
            matrix_b_r(r,p)=res_i.beta_p_r(1);
            % Sd Beta right
            matrix_se_r(r,p)=sqrt(res_i.V_cl_r(1));
            
            % Bias corrected Confidence interval (alpha=5%)
            matrix_biasCorr_ci(r,p,1)=res_i.tau_bc-norminv(.975)*res_i.se_tau_rb;
            matrix_biasCorr_ci(r,p,2)=res_i.tau_bc+norminv(.975)*res_i.se_tau_rb;
            
            % N
            matrix_nl(r,p)=res_i.N_h_l;
            matrix_nr(r,p)=res_i.N_h_r;
            
            % If with IV:
            if(outcomes{r,5}==1&&~isempty(subpobs{p,4}))
                regName_i_iv=sprintf('%s_%i_%s_iv',outcomes{r,3},p,bandwidthTable);
                
                res_i_iv=res.(regName_i_iv);
                % Beta
                matrix_b_iv(r,p)=res_i_iv.tau_cl;
                % Sd Beta
                matrix_se_iv(r,p)=res_i_iv.se_tau_cl;
                
                % N
                matrix_nl_iv(r,p)=res_i_iv.N_h_l;
                matrix_nr_iv(r,p)=res_i_iv.N_h_r;
                
            end
            
            if(plotRDs)
                
                % Load full bandwidth result for ploting
                warning('Add small bandwidth estimate for full bandwidth plot')
                
                res_i=res.(sprintf('%s_%i_%s',outcomes{r,3},p,'full'));
                res_i_table=res.(sprintf('%s_%i_%s',outcomes{r,3},p,bandwidthTable));
                
                plotsub=subpobs{p,2};
                
                figure
                plotRD(res_i,dataRD,'subpop',plotsub,'otherPointEstimate',res_i_table);
                if(savePlotRDs)
                    easyExport([dirPlots,sprintf(sprintf('%s_P_%s_%s.png',anhoStr,outcomes{r,3},subpobs{p,3}))]);
                end
                
                
                
                if(plotWithZoom)
                    % Plot with zoom
                    figure
                    plotRD(res_i,dataRD,'subpop',plotsub,'newxlim',[.1,.5],'nb',100,'otherPointEstimate',res_i_table);
                    easyExport([dirPlots,sprintf(sprintf('%s_P_%s_%s_withZoom.png',anhoStr,outcomes{r,3},subpobs{p,3}))]);
                end
                
                
                if(closePlotsWhilePlotting)
                    close all
                end
            else
                fprintf('Ojo: reg %s no existe para subpob %s\n',regName_i,subpobs{p,1});
            end
        end
    end
    
    %vector_Nl(1,p)=sum(dataRD.riskPopup<.3&subpobs{p,2});
    %vector_Nr(1,p)=sum(dataRD.riskPopup>.3&subpobs{p,2});
    
end

for p=1:cantPobs
    %vector_Nl(1,p)=sum(dataRD.riskPopup<.3&subpobs{p,2});
    %vector_Nr(1,p)=sum(dataRD.riskPopup>.3&subpobs{p,2});
end




%% RD main table
matTable=nan(size(matrix_b).*[1 2]+[2 0]);
matTable_sd=nan(size(matrix_b).*[1 2]+[2 0]);

matTable(:,1:2:end)=[matrix_b;...
    matrix_nl(end,:)...
    ;matrix_nr(end,:)];

% matTable(:,2:2:end)=[matrix_b_iv;...
%     matrix_nl_iv(find(cell2mat(outcomes(:,5)),1,'first'),:)...
%    ;matrix_nr_iv(find(cell2mat(outcomes(:,5)),1,'first'),:)];


matTable_sd(:,1:2:end)=[matrix_se;nan(2,size(matrix_se,2))];
%matTable_sd(:,2:2:end)=[matrix_se_iv;nan(2,size(matrix_se_iv,2))];

withInfo=not(all(isnan(matTable),1));


header=repmat({''},2,size(matTable,2));
header(1,1:2:end)=subpobs(:,1)';
header(1,2:2:end)=subpobs(:,1)';
header(2,2:2:end)={'IV'};

matTable=matTable(:,withInfo);
matTable_sd=matTable_sd(:,withInfo);
header=header(:,withInfo);

%%
opt=struct;
opt.header=header;
opt.stderrs=mat2cellstr(matTable_sd,'conParentesis',true,'precisionDecimal','%.3f');
opt.primeracolumna=[outcomes(:,2);{'NL';'NR'}];
%opt.stars=getStars(matTable,matTable_sd);
opt.addColumnNumber=true;
opt.sizeFootnoteFloat='\scriptsize';
opt.verticalAdjustParam=10;
if(anho==0)
    opt.columnafantasma=1;

else
    %opt.columnafantasma=[4];
    opt.title=sprintf('%s Sample - Pop-Up effect',anhoStr);
end
opt.filaFantasma=size(matTable,1)-2;
paneles=find(not(ismissing(outcomes(:,end-1))));
opt.panel=[num2cell(paneles-1),outcomes(paneles,end-1)];
%opt.alignmentFirstCol={'L{3cm}'};
opt.adjust=true;
opt.positionParameter='H';

opt.file=sprintf('%s%s_tablePopup%s',dirTable,anhoStr,heter);
switch heter
    case ''
        opt.columnafantasma=1;
        opt.title='Efecto del Pop-Up';
        opt.label='tabPopup';
        
        opt.note='Errores estándar entre paréntesis';
          otherwise
        error('aca')
end
tabla=cell2latex(mat2cellstr(matTable,'precisionDecimal','%.3f','revisarFilas',true),'opts',opt);
if(compileLatexTable)
compileLatex(tabla)
end

